Gráfico¶

In [ ]:
library("fredr")
library("tidyverse")
library("scales")
library("vroom")
library("ggplot2")
library("dplyr")
library("scales")
library("zoo")
library("gapminder")
library("gganimate")
library("utf8")
library("gridExtra")
library("socviz")
library("ggrepel")
Sys.setlocale("LC_ALL", "pt_br.utf-8")
library("lubridate")
library("readr")
library("ggthemes")
library("maps")
library("ggpomological")
library("ggthemr")
library("extrafont")
library("stringr")
'pt_br.utf-8/pt_br.utf-8/pt_br.utf-8/C/pt_br.utf-8/C'

theme_solarized()
theme_pomological()
scale_colour_solarized()
theme_wsj()

In [ ]:
# obs:
# mudança na escala feita da seguinte maneira:
options(repr.plot.width=15, repr.plot.height=8)
In [ ]:
# font_import()
In [ ]:
runners_data = read.csv("Runners.csv")
glimpse(runners_data)
Rows: 18,244
Columns: 10
$ Rank          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 15, 16, 17,…
$ Time          <chr> "00:01:40.910000", "00:01:41.010000", "00:01:41.090000",…
$ Name          <chr> "David Rudisha", "David Rudisha", "David Rudisha", "Wils…
$ Country       <chr> "KEN", "KEN", "KEN", "DEN", "DEN", "KEN", "KEN", "KEN", …
$ Date.of.Birth <chr> "1988-12-17", "1988-12-17", "1988-12-17", "1970-12-12", …
$ Place         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,…
$ City          <chr> "London", "Rieti", "Berlin", "Köln", "Zürich", "Rieti", …
$ Date          <chr> "2012-09-08", "2010-08-29", "2010-08-22", "1997-08-24", …
$ Gender        <chr> "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", …
$ Event         <chr> "800 m", "800 m", "800 m", "800 m", "800 m", "800 m", "8…
In [ ]:
runners_data$Date <- as.POSIXct(runners_data$Date, format = "%Y-%m-%d")
head(runners_data)
A data.frame: 6 x 10
RankTimeNameCountryDate.of.BirthPlaceCityDateGenderEvent
<int><chr><chr><chr><chr><int><chr><dttm><chr><chr>
1100:01:40.910000David Rudisha KEN1988-12-171London2012-09-08Men800 m
2200:01:41.010000David Rudisha KEN1988-12-171Rieti 2010-08-29Men800 m
3300:01:41.090000David Rudisha KEN1988-12-171Berlin2010-08-22Men800 m
4400:01:41.110000Wilson KipketerDEN1970-12-121Köln 1997-08-24Men800 m
5500:01:41.240000Wilson KipketerDEN1970-12-121Zürich1997-08-13Men800 m
6600:01:41.330000David Rudisha KEN1988-12-171Rieti 2011-10-09Men800 m
In [ ]:
runners_data <- runners_data |>
mutate(
    Age = year(runners_data$Date) - year(runners_data$Date.of.Birth	)
)
glimpse(runners_data)
Rows: 18,244
Columns: 11
$ Rank          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 15, 16, 17,…
$ Time          <chr> "00:01:40.910000", "00:01:41.010000", "00:01:41.090000",…
$ Name          <chr> "David Rudisha", "David Rudisha", "David Rudisha", "Wils…
$ Country       <chr> "KEN", "KEN", "KEN", "DEN", "DEN", "KEN", "KEN", "KEN", …
$ Date.of.Birth <chr> "1988-12-17", "1988-12-17", "1988-12-17", "1970-12-12", …
$ Place         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,…
$ City          <chr> "London", "Rieti", "Berlin", "Köln", "Zürich", "Rieti", …
$ Date          <dttm> 2012-09-08, 2010-08-29, 2010-08-22, 1997-08-24, 1997-08…
$ Gender        <chr> "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", …
$ Event         <chr> "800 m", "800 m", "800 m", "800 m", "800 m", "800 m", "8…
$ Age           <dbl> 24, 22, 22, 27, 27, 23, 22, 24, 25, 27, 18, 24, 21, 26, …
In [ ]:
color_per_gender <- c("Men" = "#2986cc", "Women" = "#d5a6bd")
color_per_medal <- c("Gold_medals" = "#D6AF36", "Silver_medals"= "#A7A7AD", "Bronze_medals" = "#A77044")

Performance entre sexo¶

Vamos começar analisando a performace entre homens e mulhes

In [ ]:
runners_women <- runners_data |>
filter(Gender == "Women")

runners_men <- runners_data |>
filter(Gender == "Men")

runners_men$Year <- year(runners_men$Date)
runners_women$Year <- year(runners_women$Date)
runners_data$Year <- year(runners_data$Date)
In [ ]:
glimpse(runners_data)
Rows: 18,244
Columns: 12
$ Rank          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 15, 16, 17,…
$ Time          <chr> "00:01:40.910000", "00:01:41.010000", "00:01:41.090000",…
$ Name          <chr> "David Rudisha", "David Rudisha", "David Rudisha", "Wils…
$ Country       <chr> "KEN", "KEN", "KEN", "DEN", "DEN", "KEN", "KEN", "KEN", …
$ Date.of.Birth <chr> "1988-12-17", "1988-12-17", "1988-12-17", "1970-12-12", …
$ Place         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,…
$ City          <chr> "London", "Rieti", "Berlin", "Köln", "Zürich", "Rieti", …
$ Date          <dttm> 2012-09-08, 2010-08-29, 2010-08-22, 1997-08-24, 1997-08…
$ Gender        <chr> "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", …
$ Event         <chr> "800 m", "800 m", "800 m", "800 m", "800 m", "800 m", "8…
$ Age           <dbl> 24, 22, 22, 27, 27, 23, 22, 24, 25, 27, 18, 24, 21, 26, …
$ Year          <dbl> 2012, 2010, 2010, 1997, 1997, 2011, 2010, 2012, 1981, 19…
In [ ]:
performance_runners_men <- runners_men |>
group_by(Year) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
glimpse(performance_runners_men)
Rows: 52
Columns: 5
$ Year          <dbl> 1962, 1965, 1966, 1967, 1968, 1969, 1971, 1972, 1973, 19…
$ Gold_medals   <int> 1, 1, 1, 2, 7, 1, 2, 5, 3, 10, 7, 7, 16, 14, 14, 15, 18,…
$ Silver_medals <int> 0, 0, 0, 0, 4, 0, 0, 2, 0, 1, 0, 3, 4, 2, 0, 3, 1, 8, 7,…
$ Bronze_medals <int> 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 2, 4,…
$ Total         <int> 1, 1, 1, 2, 14, 1, 2, 7, 3, 12, 7, 11, 21, 17, 14, 19, 1…
In [ ]:
performance_runners_women <- runners_women |>
group_by(Year) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
glimpse(performance_runners_women)
Rows: 45
Columns: 5
$ Year          <dbl> 1964, 1972, 1973, 1974, 1976, 1977, 1978, 1979, 1980, 19…
$ Gold_medals   <int> 1, 2, 3, 8, 28, 12, 22, 47, 59, 49, 59, 62, 119, 78, 90,…
$ Silver_medals <int> 0, 1, 0, 0, 13, 2, 8, 13, 27, 15, 18, 26, 54, 33, 30, 30…
$ Bronze_medals <int> 0, 0, 0, 0, 7, 0, 5, 7, 16, 8, 9, 12, 25, 11, 8, 14, 26,…
$ Total         <int> 1, 3, 3, 8, 48, 14, 35, 67, 102, 72, 86, 100, 198, 122, …
In [ ]:
performance_runners_men$Gender <- "Men"
performance_runners_women$Gender <- "Women"
combined_data_performance_runners <- rbind(performance_runners_men, performance_runners_women)
subtitle_text <- "Diferença entre a quantidade de medalhas de homens e mulheres ao passar do tempo"
wrapped_subtitle <- str_wrap(subtitle_text, width = 73)  

ggplot(data = combined_data_performance_runners, aes(x = Year, y = Total, color = Gender)) + geom_line(alpha = 0.4) + scale_x_continuous(breaks = seq(1960, 2015, 10)) +
geom_smooth(se = FALSE) + theme_wsj(color = "gray") + scale_color_manual(values = color_per_gender)  +
labs(
    title = "Performance dos corredores",
    subtitle = wrapped_subtitle
) + theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom"
)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
In [ ]:
subtitle_text <- "Separando o desempenho dos competidores em categorias de medalhas conquistadas,ouro,prata e bronze, respectivamente"
wrapped_subtitle <- str_wrap(subtitle_text, width = 120)  
gold_medal_plot <- ggplot(data = combined_data_performance_runners, mapping = aes(y = Gold_medals, x  = Year,color = Gender)) + geom_line() + theme_wsj(color = "gray") +
scale_color_manual(values = color_per_gender) + labs(
    title  = "Medalhas entre homens e mulheres",
     subtitle = wrapped_subtitle
)  + theme(
    plot.title = element_text(hjust = 0.5,size = 15),
    plot.subtitle = element_text(size = 15)
) + guides(color = FALSE)
In [ ]:
silver_medal_plot <- ggplot(data = combined_data_performance_runners, mapping = aes(y = Silver_medals, x  = Year,color = Gender)) + geom_line() + theme_wsj(color = "gray") + guides(color = FALSE)+ 
scale_color_manual(values = color_per_gender) + labs(
    y = "medalhas de prata"
)
In [ ]:
bronze_medal_plot <- ggplot(data = combined_data_performance_runners, mapping = aes(y = Silver_medals, x  = Year,color = Gender)) + geom_line() + theme_wsj(color = "gray") + 
scale_color_manual(values = color_per_gender) + labs(
    y = "medalhas de bronze"
) + theme(
    legend.position = "bottom"
)
In [ ]:
grid.arrange(gold_medal_plot,silver_medal_plot,bronze_medal_plot)
In [ ]:
head(combined_data_performance_runners)
A tibble: 6 x 6
YearGold_medalsSilver_medalsBronze_medalsTotalGender
<dbl><int><int><int><int><chr>
1962100 1Men
1965100 1Men
1966100 1Men
1967200 2Men
196874314Men
1969100 1Men
In [ ]:
subtitle_text <- "Separando o desempenho dos competidores em categorias de medalhas conquistadas,ouro,prata e bronze, respectivamente"
wrapped_subtitle <- str_wrap(subtitle_text, width = 120)
In [ ]:
gold_medal_plot  <- ggplot(data = combined_data_performance_runners,mapping = aes(x = Gold_medals, fill = Gender)) + geom_histogram(bins = 20) + theme_wsj(color = "gray") + 
scale_color_manual(values = color_per_gender) + scale_fill_manual(values = color_per_gender)+ guides(fill = FALSE) + labs(title = "Quantidade de medalhas",subtitle = wrapped_subtitle) + 
theme(plot.title = element_text(hjust = 0.5),plot.subtitle = element_text(size = 15))

silver_medal_plot <- ggplot(data = combined_data_performance_runners,mapping = aes(x = Silver_medals, fill = Gender)) + geom_histogram(bins = 20) + theme_wsj(color = "gray") + guides(fill = FALSE) + scale_fill_manual(values = color_per_gender)

bronze_medal_plot <- ggplot(data = combined_data_performance_runners,mapping = aes(x = Bronze_medals, fill = Gender)) + geom_histogram(bins = 20) + theme_wsj(color = "gray")  + scale_fill_manual(values = color_per_gender) + theme(legend.position = "bottom")

grid.arrange(gold_medal_plot,silver_medal_plot,bronze_medal_plot)

Performance por país¶

In [ ]:
performance_country <- runners_data |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Country,sort = TRUE)

performance_country_filtered <- head(performance_country$Country,9)
performance_country_filtered <- as.list(performance_country_filtered)
In [ ]:
countries_performance_per_year <- runners_data |>
group_by(Country,Year) |>
filter(Place == 1 | Place == 2 | Place == 3, Year <= 2016) |>
count(Country,sort =  TRUE) |>
rename(total = n)  

top_countries_performance_per_year <- countries_performance_per_year |> filter(Country %in% performance_country_filtered)
In [ ]:
subtitle_text <- "Diferença entre a quantidade de medalhas dos países ao passar do tempo"
wrapped_subtitle <- str_wrap(subtitle_text, width = 73)  

ggplot(data = top_countries_performance_per_year, mapping = aes(x = Year, y = total)) + geom_line(mapping = aes(group = Country)) + 
facet_wrap(~Country, ncol = 3)  + theme_economist_white() + 
labs(title = "Quantidade de medalhas conquitadas por País", x = NULL, y = NULL) + 
theme(plot.title = element_text(hjust = 0.5), )
In [ ]:
performance_country_by_sex <- runners_data |>
group_by(Country,Gender,Year) |>
filter(Place == 1 | Place == 2 | Place == 3, Year <= 2016) |>
count(Gender,sort =  TRUE) |>
rename(total = n) 
top_performance_country_by_sex <- performance_country_by_sex |> filter(Country %in% performance_country_filtered)
In [ ]:
ggplot(data = top_performance_country_by_sex, mapping = aes(x = Year, y = total,color = Gender)) + geom_line() + 
facet_wrap(~Country, ncol = 3) + theme_economist_white() + 
labs(
    title  = "Medalhas entre homens e mulheres",
     subtitle = wrapped_subtitle
)  + theme(
    plot.title = element_text(hjust = 0.5,size = 15),
    plot.subtitle = element_text(size = 15)
)
In [ ]:
performance_runners_men <- runners_men |>
group_by(Country) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
In [ ]:
performance_runners_men <- performance_runners_men |>
pivot_longer(
  cols = c(Gold_medals, Silver_medals, Bronze_medals),
  names_to = "Medal",
  values_to = "Count"
)
performance_runners_men <-as.data.frame(performance_runners_men)
performance_runners_men <- performance_runners_men |> filter(Country %in% performance_country_filtered)
In [ ]:
subtitle_text <- "Separando o desempenho dos competidores homens em categorias de medalhas conquistadas,ouro,prata e bronze, respectivamente."
wrapped_subtitle <- str_wrap(subtitle_text, width = 95) 
ggplot(data = performance_runners_men, mapping = aes(x = reorder(Country,Count), y = Count,fill = Medal)) + 
geom_bar(stat = "identity",position = "dodge2") + coord_flip() + theme_wsj(color = "gray") + scale_fill_manual(values = color_per_medal) +
theme(
    legend.position = "bottom"
) + labs(
    title = "Performance dos países na categoria masculina",
    subtitle = wrapped_subtitle
) + theme(plot.title = element_text(hjust = 0.5),plot.subtitle = element_text(size = 19))
In [ ]:
performance_runners_women <- runners_women|>
group_by(Country) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
In [ ]:
performance_runners_women <- performance_runners_women |>
pivot_longer(
  cols = c(Gold_medals, Silver_medals, Bronze_medals),
  names_to = "Medal",
  values_to = "Count"
)
performance_runners_women <-as.data.frame(performance_runners_women)
performance_runners_women <- performance_runners_women |> filter(Country %in% performance_country_filtered)
In [ ]:
subtitle_text <- "Separando o desempenho das competidoras mulheres em categorias de medalhas conquistadas,ouro,prata e bronze, respectivamente."
wrapped_subtitle <- str_wrap(subtitle_text, width = 95) 
ggplot(data = performance_runners_women, mapping = aes(x = reorder(Country,Count), y = Count,fill = Medal)) + 
geom_bar(stat = "identity",position = "dodge2") + coord_flip() + theme_wsj(color = "gray") + scale_fill_manual(values = color_per_medal) +
theme(
    legend.position = "bottom"
) + labs(
    title = "Performance dos países na categoria feminina",
    subtitle = wrapped_subtitle
) + theme(plot.title = element_text(hjust = 0.5),plot.subtitle = element_text(size = 19))

Relação entre a idade e velocidade¶

In [ ]:
fastest_time <- runners_data |>
group_by(Time,Age) |>
arrange(Time,sort = TRUE)

temp_list <- c()
temp_list_1 <- c("Age<=20","20<Age<25","25<Age<30","30<Age<35","Age>=35")
 
temp_list <- append(temp_list,fastest_time |> filter(Age <= 20) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 25 & Age >= 20 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 30 & Age >= 25 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 35 & Age >= 30 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age >= 35) |> nrow())

fastest_time <- data.frame(Age = temp_list_1,Total = temp_list)
In [ ]:
ggplot(data = fastest_time, mapping = aes(x = Age, y = Total, fill = Age)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("gray", "#4F6D7A", "gray","gray","gray")) +
  theme_wsj(color = "gray") +
  labs(title = "Idade média dos corredores") +
  theme(plot.title = element_text(hjust = 0.5),legend.position = "bottom")
In [ ]:
fastest_time <- runners_data |>
group_by(Time,Age) |>
filter(Event == "100 m") |>
arrange(Time,sort = TRUE)

mean_100m <- mean(fastest_time$Age)
temp_list <- c()
temp_list_1 <- c("Age<=20","20<Age<25","25<Age<30","30<Age<35","Age>=35")
 
temp_list <- append(temp_list,fastest_time |> filter(Age <= 20) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 25 & Age >= 20 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 30 & Age >= 25 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 35 & Age >= 30 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age >= 35) |> nrow())

fastest_time <- data.frame(Age = temp_list_1,Total = temp_list)

ggplot(data = fastest_time, mapping = aes(x = Age,fill = Age)) +
  geom_bar(mapping = aes(y = Total), stat = "identity") +
  scale_fill_manual(values = c("#56A3A6", "#4F6D7A","gray","gray","gray")) +
  theme_wsj(color = "gray") +
  labs(title = "Idade média entre os corredores de 100m") + theme(
    plot.title = element_text(hjust = 0.5), 
    legend.position = "bottom"
  )
In [ ]:
mean_100m
26.4383940932164
In [ ]:
fastest_time <- runners_data |>
group_by(Time,Age) |>
filter(Event == "Marathon") |>
arrange(Time,sort = TRUE) |>
na.omit()

mean_marathon <- mean(fastest_time$Age)
temp_list <- c()
temp_list_1 <- c("Age<=20","20<Age<25","25<Age<30","30<Age<35","Age>=35")
 
temp_list <- append(temp_list,fastest_time |> filter(Age <= 20) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 25 & Age >= 20 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 30 & Age >= 25 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 35 & Age >= 30 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age >= 35) |> nrow())

fastest_time <- data.frame(Age = temp_list_1,Total = temp_list)

ggplot(data = fastest_time, mapping = aes(x = Age, fill = Age)) +
  geom_bar(mapping = aes(y = Total), stat = "identity") +
  scale_fill_manual(values = c("gray", "#4F6D7A","#56A3A6","gray","gray")) +
  theme_wsj(color = "gray") +
  labs(title = "Idade média entre os corredores das maratonas") + theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom"
    
  )
In [ ]:
mean_marathon
28.3914666666667

Corredores com a maior quantidade de medalhas¶

In [ ]:
most_medals_men <- runners_men |>
group_by(Name) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

most_medals_men <- head(most_medals_men,10)
head(most_medals_men)
most_medals_men_list = as.list(most_medals_men$Name)
A grouped_df: 6 x 2
Namen
<chr><int>
Michael Johnson 122
Usain Bolt 107
Asafa Powell 106
LaShawn Merritt 97
Frank Fredericks 81
Justin Gatlin 78
In [ ]:
medals_over_time_fastest_men <- runners_men |>
filter(Name %in% most_medals_men_list) |>
group_by(Year) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

ggplot(data = medals_over_time_fastest_men, mapping = aes(x = Year, y = n,color = Name)) + geom_line(linetype = "dashed") +
theme_excel_new()
Warning message:
“Removed 1 row containing missing values (`geom_line()`).”
In [ ]:
medals_over_time_fastest_men <- runners_men |>
filter(Name %in% most_medals_men_list) |>
group_by(Year) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

ggplot(data = medals_over_time_fastest_men, mapping = aes(x = Year, y = n,color = Name)) + geom_line(alpha = 0.3) +
geom_line(data = subset(medals_over_time_fastest_men, Name == "Usain Bolt"), size = 1) +
geom_line(data = subset(medals_over_time_fastest_men, Name == "Michael Johnson"), size = 1) +
theme_excel_new() + annotate("text", x = 1992, y = 16.5, label = "Michael Johnson",size =5) +
geom_segment(
    aes(x = 1992, y =16 , xend = 1995, yend = 15),
    arrow = arrow(length = unit(0.3, "cm")),
    color = "#2E4F4F",
    size = 0.5
)   + annotate("text", x = 2010, y = 16.5, label = "Usain Bolt",size =5) +
geom_segment(
    aes(x = 2010, y = 16 , xend = 2008.4, yend = 15),
    arrow = arrow(length = unit(0.3, "cm")),
    color = "#2E4F4F",
    size = 0.5
)
Warning message:
“Removed 1 row containing missing values (`geom_line()`).”
In [ ]:
most_medals_women <- runners_women |>
group_by(Name) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

most_medals_women <- head(most_medals_women,10)
head(most_medals_women)
most_medals_women_list = as.list(most_medals_women$Name)
A grouped_df: 6 x 2
Namen
<chr><int>
Merlene Ottey 174
Gwen Torrence 95
Veronica Campbell-Brown 84
Sanya Richards-Ross 83
Maria Mutola 80
Allyson Felix 79
In [ ]:
medals_over_time_fastest_women <- runners_women |>
filter(Name %in% most_medals_women_list) |>
group_by(Year) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

ggplot(data = medals_over_time_fastest_women, mapping = aes(x = Year, y = n,color = Name)) + geom_line(linetype = "dashed") +
theme_excel_new()
In [ ]:
ggplot(data = medals_over_time_fastest_women, mapping = aes(x = Year, y = n,color = Name)) + geom_line(alpha = 0.3) +
geom_line(data = subset(medals_over_time_fastest_women, Name == "Merlene Ottey"), size = 1) +
theme_excel_new() + annotate("text", x = 1988, y = 20.5, label = "Merlene Ottey",size =5) +
geom_segment(
    aes(x = 1988, y = 20, xend = 1990, yend = 18),
    arrow = arrow(length = unit(0.3, "cm")),
    color = "#2E4F4F",
    size = 0.5
)

Melhores competidores dos 100 metros¶

In [ ]:
runners_men_100m <- runners_men |>
filter(Event == "100 m") |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)


top_100m_men <- head(runners_men_100m,5)
top_100m_men_list <- as.list(top_100m_men$Name)
In [ ]:
performance_runners_men <- runners_men |>
group_by(Name) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
In [ ]:
head(performance_runners_men)
top_100m_men_list
A tibble: 6 x 5
NameGold_medalsSilver_medalsBronze_medalsTotal
<chr><int><int><int><int>
Aaron Armstrong 1001
Aaron Brown 4206
Aaron Ernest 1001
Abadi Hadis 0011
Abayneh Ayele 0000
Abdalelah Haroun4105
  1. 'Asafa Powell'
  2. 'Justin Gatlin'
  3. 'Maurice Greene'
  4. 'Usain Bolt'
  5. 'Tyson Gay'
In [ ]:
performance_runners_men <- performance_runners_men |>
pivot_longer(
  cols = c(Gold_medals, Silver_medals, Bronze_medals),
  names_to = "Medal",
  values_to = "Count"
)
performance_runners_men <-as.data.frame(performance_runners_men)
performance_runners_men <- performance_runners_men |> filter(Name %in% top_100m_men_list)
In [ ]:
ggplot(data = performance_runners_men, mapping = aes(x = reorder(Name,Count), y = Count,fill = Medal)) + 
geom_bar(stat = "identity",position = "dodge2") + coord_flip() + theme_wsj(color = "gray") + scale_fill_manual(values = color_per_medal) +
labs(
    title = "Maiores corredores dos 100 metros"
) + theme(
    legend.position = "bottom"
)
In [ ]:
runners_women100m <- runners_women |>
filter(Event == "100 m") |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

top_100m_women <- head(runners_women100m,5)
top_100m_women_list <- as.list(top_100m_women$Name)
In [ ]:
performance_runners_women <- runners_women |>
group_by(Name) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
In [ ]:
head(performance_runners_women)
top_100m_women_list
A tibble: 6 x 5
NameGold_medalsSilver_medalsBronze_medalsTotal
<chr><int><int><int><int>
Ababel Yeshaneh0101
Abeba Aregawi 5308
Abebe Arigawi 5128
Abebe Tola 0000
Abebech Afework1113
Abebu Gelan 0000
  1. 'Merlene Ottey'
  2. 'Veronica Campbell-Brown'
  3. 'Carmelita Jeter'
  4. 'Gwen Torrence'
  5. 'Marion Jones'
In [ ]:
performance_runners_women <- performance_runners_women |>
pivot_longer(
  cols = c(Gold_medals, Silver_medals, Bronze_medals),
  names_to = "Medal",
  values_to = "Count"
)
performance_runners_women <-as.data.frame(performance_runners_women)
performance_runners_women <- performance_runners_women |> filter(Name %in% top_100m_women_list)
In [ ]:
ggplot(data = performance_runners_women, mapping = aes(x = reorder(Name,Count), y = Count,fill = Medal)) + 
geom_bar(stat = "identity",position = "dodge2") + coord_flip()  + theme_wsj(color = "gray") + scale_fill_manual(values = color_per_medal) +
labs(
    title = "Maiores corredoras dos 100 metros"
) + theme(
    legend.position = "bottom"
    
)
In [ ]:
fastest_time_100m_men <- runners_men |>
filter(Event == "100 m") |>
arrange(Time)
top_50_fastest_time_100m_men <- head(fastest_time_100m_men,50)

ggplot(data = top_50_fastest_time_100m_men, mapping = aes(x = Age,y = Time,color = Name)) + geom_point(mapping = aes(group = Name))  +
annotate(
    geom = "rect", xmin = 21, xmax = 28,
    ymin = 0, ymax = 7, fill = "orange", alpha = 0.2
) + theme_bw() + guides(color =  FALSE) +
geom_text_repel(data = subset(top_50_fastest_time_100m_men, Time <= "00:00:09.740000"), mapping = aes(label = Name),size = 3.5, vjust = 0.2,hjust = 0.2,fontface = "italic")

Melhores competidores das maratonas¶

In [ ]:
head(
runners_men |>
filter(Event == "Marathon") |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE, .keep = Time)
,3)
A data.frame: 3 x 3
Name.keepn
<chr><chr><int>
1Abel Kirui 02:07:382
2Paul Biwott 02:08:172
3Abdelkader El Mouaziz02:07:111
In [ ]:
Abel_Kirui_runner <- subset(runners_data, Name == "Abel Kirui" & Event == "Marathon")
In [ ]:
head(
runners_women |>
filter(Event == "Marathon") |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE, .keep = Time)
,3)
A data.frame: 3 x 3
Name.keepn
<chr><chr><int>
1Aselefech Mergia 02:25:322
2Jelena Prokopcuka02:24:072
3Mare Dibaba 02:19:522
In [ ]:
fastest_time_marathon_men <- runners_men |>
filter(Event == "Marathon") |>
arrange(Time)
top_50_fastest_time_marathon_men <- head(fastest_time_marathon_men,50)

ggplot(data = top_50_fastest_time_marathon_men, mapping = aes(x = Age, Time,color = Name)) + geom_point(mapping = aes(group = Name)) +
annotate(
    geom = "rect", xmin = 28, xmax = 36,
    ymin = 0, ymax = 14, fill = "orange", alpha = 0.2
    ) + theme_bw() + guides(color =  FALSE) +
geom_text_repel(data = subset(top_50_fastest_time_marathon_men, Time <= "02:03:58"), mapping = aes(label = Name),size = 3, vjust = 0.2,hjust = 0.2,)

Evolução dos corredores¶

In [ ]:
performance_country_filtered <- head(performance_country$Country,5)
performance_country_filtered <- as.list(performance_country_filtered)
countries_performance_per_year <- runners_data |>
group_by(Country,Year) |>
filter(Place == 1 | Place == 2 | Place == 3, Year <= 2016) |>
count(Country,sort =  TRUE) |>
rename(total = n) 
top_countries_performance_per_year <- countries_performance_per_year |> filter(Country %in% performance_country_filtered)
head(top_countries_performance_per_year)
A grouped_df: 6 x 3
CountryYeartotal
<chr><dbl><int>
KEN2011199
KEN2012198
KEN2016190
KEN2014183
KEN2015174
KEN2010168
In [ ]:
top_countries_performance_per_year$Year <- as.Date(top_countries_performance_per_year$Year)
In [ ]:
glimpse(top_countries_performance_per_year)
Rows: 203
Columns: 3
Groups: Country, Year [203]
$ Country <chr> "KEN", "KEN", "KEN", "KEN", "KEN", "KEN", "USA", "KEN", "KEN",…
$ Year    <date> 1975-07-05, 1975-07-06, 1975-07-10, 1975-07-08, 1975-07-09, 1…
$ total   <int> 199, 198, 190, 183, 174, 168, 166, 143, 143, 131, 123, 118, 11…
In [ ]:
ggplot(data = top_countries_performance_per_year, mapping = aes(x = Year, y = total, color = Country)) +
scale_x_date(date_labels = "%Y", date_breaks = "1 year") + 
geom_line(size = 2, alpha = 0.75) + theme_minimal() + transition_reveal(Year) +  theme(plot.margin = margin(5.5, 40, 5.5, 5.5))
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
In [ ]:
p <- ggplot(data = top_countries_performance_per_year, mapping = aes(x = Year, y = total, color = Country)) +
  scale_x_date(date_labels = "%Y", date_breaks = "1 year") +
  geom_line(size = 2, alpha = 0.75) +
  theme_minimal() +
  transition_reveal(Year) + theme_wsj(color = "gray")
animate(p, width = 900, height = 700)
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
# A tibble: 100 × 7
   format width height colorspace matte filesize density
   <chr>  <int>  <int> <chr>      <lgl>    <int> <chr>  
 1 gif      900    700 sRGB       TRUE         0 72x72  
 2 gif      900    700 sRGB       TRUE         0 72x72  
 3 gif      900    700 sRGB       TRUE         0 72x72  
 4 gif      900    700 sRGB       TRUE         0 72x72  
 5 gif      900    700 sRGB       TRUE         0 72x72  
 6 gif      900    700 sRGB       TRUE         0 72x72  
 7 gif      900    700 sRGB       TRUE         0 72x72  
 8 gif      900    700 sRGB       TRUE         0 72x72  
 9 gif      900    700 sRGB       TRUE         0 72x72  
10 gif      900    700 sRGB       TRUE         0 72x72  
# ℹ 90 more rows